#!/usr/bin/env bash

# save file
declare -g machine_collection_file="/tmp/collect/$(hostname)-node.data"
declare -g machine_disk_detail_file="/tmp/collect/$(hostname)-node-disk-detail.data"

: '''
Machine Metadata:
+-------------------------------------------------------------------------------------+
| Field            | Type       | Label | Comment                                     |
+-------------------------------------------------------------------------------------+
| machine_id       | string     |       | uid of machine                              |
| architecture     | string     |       | architecture of machine                     |
| model            | string     |       | cpu Model name                              |
| cpu_num          | string     |       | number of cpu on a machine                  |
| mem_size         | string     |       | memory size                                 |
| swap_size        | string     |       | memory size                                 |
| l1cache          | string     |       | L1 cache                                    |
| l2cache          | string     |       | L2 cache                                    |
| l3cahce          | string     |       | L3 cache                                    |
+-------------------------------------------------------------------------------------+
'''
function collect_machine_metadata() {
    echo "Machine Metadata:" >> ${machine_collection_file}
    echo "machine ID: $(hostname)-$(cat /etc/machine-id)"  >> ${machine_collection_file}
    uname -a >> ${machine_collection_file}
    cat /proc/cpuinfo | grep "model name" | uniq >> ${machine_collection_file}
    lscpu | grep "On-line CPU(s) list" >> ${machine_collection_file}
    cat /proc/meminfo | grep MemTotal >> ${machine_collection_file}
    cat /proc/meminfo | grep SwapTotal >> ${machine_collection_file}
    lscpu | grep L1 | grep cache >> ${machine_collection_file}
    lscpu | grep L2 | grep cache >> ${machine_collection_file}
    lscpu | grep L3 | grep cache >> ${machine_collection_file}
    echo "" >> ${machine_collection_file}
}

: '''
Machine Usage:
+--------------------------------------------------------------------------------------------+
| Field            | Type       | Label | Comment                                            |
+--------------------------------------------------------------------------------------------+
| time_stamp       | double     |       | time stamp, in second                              |
| cpu_util_percent | bigint     |       | [0, 100]                                           |
| mem_util_percent | bigint     |       | [0, 100]                                           |
| cpi              | double     |       | cycles per instruction(options)                    |
| mkpi             | bigint     |       | cache miss per second(options)                     |
| llc              | bigint     |       | llc occupancy, in KB(options)                      |
| lmb              | double     |       | local memory bandwidth(options)                    |
| rmb              | double     |       | remote memory bandwidth(options)                   |
| net_in           | map bigint |       | coming network traffic, bps                        |
| net_out          | map bigint |       | out going network traffic, bps                     |
| disk_io          | string     |       | disk io (read/write)                               |
+--------------------------------------------------------------------------------------------+
'''
function collect_machine_usage() {
    local timestamp=$(date "+%Y-%m-%d %H:%M:%S")
    local cpu_util_percent=$(top -b -n2 | fgrep "Cpu(s)" | tail -1 | awk -F'id,' '{split($1, vs, ","); v=vs[length(vs)]; sub(/\s+/, "", v);sub(/\s+/, "", v); printf "%d", 100-v;}')
    local mem_used_percent=$(free -m | awk -F '[ :]+' 'NR==2{printf "%d", ($3)/$2*100}')
    # perf stat -e cache-misses --timeout 1000
    local cpi=-1
    local mkpi=-1
    local llc=-1
    local lmb=-1
    local rmb=-1

    pqos -d > /dev/null 2>&1
    if [ $? -eq 0 ]; then
        # CORE    IPC   MISSES    LLC[KB]  MBL[MB/s]  MBR[MB/s]    
        # 0-7   0.28    7893k      383.2      901.2      430.8   
        # MISSES is the number of LLC missing in 1 second

        local cpus=$(lscpu | grep "On-line CPU(s) list" | awk '{print $NF}')
        local rtd=$(pqos -m "all:[${cpus}]" -i 10 -t 2)
        local ipc=$(echo ${rtd} | tail -n 1 | awk '{print $2}')
        if [ $(echo "$ipc == 0" | bc -l) = 1 ]; then
            cpi=-1
        else
            cpi=$(round $(echo "1/${ipc}" | bc -l) 2)
        fi

        mkpi=$(echo ${rtd} | tail -n 1 | awk '{print $3}')
        llc=$(echo ${rtd} | tail -n 1 | awk '{print $4}')
        lmb=$(echo ${rtd} | tail -n 1 | awk '{print $5}')
        if [ "x$lmb" == "x" ]; then
            lmx=-1
        fi

        rmb=$(echo ${rtd} | tail -n 1 | awk '{print $6}')
        if [ "x$rmb" == "x" ]; then
            rmb=-1
        fi
    fi

    # specify the network device
    local net=$(collect_network_device_traffic 1)

    local disk_read=$(iotop -b -o -n 1 | grep "Total DISK READ" | tr ":" " " | awk '{print $4 $5}')
    local disk_write=$(iotop -b -o -n 1 | grep "Total DISK WRITE" | tr ":" " " |awk '{print $10 $11}')

    echo -e "${timestamp}\t${cpu_util_percent}\t${mem_used_percent}\t${cpi}\t${mkpi}\t${llc}\t${lmb}\t${rmb}\t${net}\t${disk_read},${disk_write}" >> ${machine_collection_file}
}

function collect_disk_io_detail() {
    local disk_io_usage=$(iotop -b -o -n 1)
    local disk_io_stat=$(iostat -d -m -t -x)
    echo -e "${disk_io_usage}\n${disk_io_stat}" >> ${machine_disk_detail_file}
    echo "" >> ${machine_disk_detail_file}
}

: '''
Container Metadata:
+-------------------------------------------------------------------------------------+
| Field            | Type       | Label | Comment                                     |
+-------------------------------------------------------------------------------------+
| container_id     | string     |       | uid of container                            |
| machine_id       | string     |       | uid of container host                       |
| image            | string     |       | container image                             |
| container_unit   | string     |       | container unit name                         |
| pod_name         | string     |       | name of pod container belongs to            |
| cpu_request      | bigint     |       | cpu shares request, 1000 means 1 core       |
| cpu_limit        | bigint     |       | 1000 means 1 core                           |
| memory_limit     | bigint     |       | in bytes                                    |
| storage          | string     |       | mounts of container                         |
+-------------------------------------------------------------------------------------+
'''
function collect_container_metadata() {
    local short_id=$1
    local long_id=$(docker inspect -f '{{.Id}}' $short_id)
    local container_collection_file="/tmp/collect/${long_id}-container.data"

    if [[ -e ${container_collection_file} ]]; then
        return
    fi

    echo "Container Metadata:" >> ${container_collection_file}
    # container_id
    echo $long_id >> ${container_collection_file}
    # machine_id
    echo "$(hostname)-$(cat /etc/machine-id)" >> ${container_collection_file}
    # image
    echo $(docker inspect -f '{{.Config.Image}}' $long_id) >> ${container_collection_file}
    # container_unit
    echo $(docker inspect -f '{{index .Config.Labels "io.kubernetes.container.name"}}' $long_id) >> ${container_collection_file}
    # pod_name
    echo $(docker inspect -f '{{index .Config.Labels "io.kubernetes.pod.name"}}' $long_id) >> ${container_collection_file}
    # cpu_request
    # cpu_shares = max(2, spec.containers[].resources.requests.cpu(core) * 1024)
    local cpu_shares=$(docker inspect -f '{{.HostConfig.CpuShares}}' $long_id)
    local cpu_request=$(round $(echo "$cpu_shares*1000/1024" | bc -l) 0)
    echo $cpu_request >> ${container_collection_file}
    # cpu_limit
    local cpu_period=$(docker inspect -f '{{.HostConfig.CpuPeriod}}' $long_id)
    local cpu_quota=$(docker inspect -f '{{.HostConfig.CpuQuota}}' $long_id)
    local cpu_limit=0
    if [ "$cpu_period" != "0" ] && [ "$cpu_quota" != "0" ]; then
        cpu_limit=$(round $(echo "$cpu_quota*1000/$cpu_period" | bc -l) 0)
    fi
    echo $cpu_limit >> ${container_collection_file}
    # memory_limit
    echo $(docker inspect -f '{{.HostConfig.Memory}}' $long_id) >> ${container_collection_file}
    # storage
    local mounts_src=$(docker inspect -f '{{range .Mounts}}{{.Source}} {{end}}' $long_id)
    # local storage="empty storage"
    # if [ "x$mounts_src" != "x" ]; then
    #     storage=$(df -h $mounts_src | tail -n +2 | awk '{print $1}' | sort | uniq)
    # fi
    # echo ${storage} >> ${container_collection_file}
    echo ${mounts_src} >> ${container_collection_file}

    echo "" >> ${container_collection_file}
    echo -e "DATE\tCPU USAGE\tMEM USAGE\tCPI\tMKPI\tLLC\tLMB\tRMB\tNET I/O\tDISK I/O" >> ${container_collection_file}

}

: '''
Container Usage:
+--------------------------------------------------------------------------------------------+
| Field            | Type       | Label | Comment                                            |
+--------------------------------------------------------------------------------------------+
| time_stamp       | double     |       | time stamp, in second                              |
| cpu_usage        | bigint     |       | cpu usage, 1000 means 1 core                       |
| memory_usage     | bigint     |       | in bytes                                           |
| cpi              | double     |       | cycles per instruction(options)                    |
| mkpi             | bigint     |       | cache miss per second(options)                     |
| llc              | bigint     |       | llc occupancy, in KB(options)                      |
| lmb              | double     |       | local memory bandwidth, in MB/s(options)           |
| rmb              | double     |       | remote memory bandwidth, in MB/s(options)          |
| net_in           | double     |       | coming network traffic, bps                        |
| net_out          | double     |       | out going network traffic, bps                     |
| disk_io          | map bigint |       | disk io bps (read/write)                           |
+--------------------------------------------------------------------------------------------+
'''
function collect_container_usage() {
    local short_id=$1
    local long_id=$(docker inspect -f '{{.Id}}' $short_id)
    local container_collection_file="/tmp/collect/${long_id}-container.data"

    if [[ ! -e ${container_collection_file} ]]; then
        return
    fi

    local cgroup_parent=$(docker inspect -f '{{.HostConfig.CgroupParent}}' $short_id)
    if [ "x$cgroup_parent" == "x" ]; then
        cgroup_parent="/docker"
    fi
    cgroup_parent="${cgroup_parent}/${long_id}"

    local timestamp=$(date "+%Y-%m-%d %H:%M:%S")
    local cpu_usage=$(collect_container_cpu_usage $cgroup_parent)
    local memory_usage=$(cat /sys/fs/cgroup/memory/$cgroup_parent/memory.usage_in_bytes)

    local cpi=-1
    local mkpi=-1
    local llc=-1
    local lmb=-1
    local rmb=-1

    pqos -d > /dev/null 2>&1
    if [ $? -eq 0 ]; then
        local tasks=$(cat /sys/fs/cgroup/cpuacct/$cgroup_parent/tasks | tr "\n" ",")
        local pids=${tasks%?}
        # PID       CORE      IPC   MISSES    LLC[KB]  [ MBL[MB/s]  MBR[MB/s]   ] 
        # 1,2,4,6, 0,2,13   0.28    7893k      383.2   [  901.2      430.8      ]
        local res=$(pqos -p "all:[$pids]" -I -i 10 -t 2)
        local ipc=$(echo $res | tail -n 1 | awk '{print $3}')
        if [ $(echo "$ipc == 0" | bc -l) = 1 ]; then
            cpi=-1
        else
            cpi=$(round $(echo "1/${ipc}" | bc -l) 2)
        fi

        mkpi=$(echo $res | tail -n 1 | awk '{print $4}')
        llc=$(echo $res | awk '{print $5}')
        lmb=$(echo $res | tail -n 1 | awk '{print $6}')
        if [ "x$lmb" == "x" ]; then
            lmx=-1
        fi

        rmb=$(echo $res | tail -n 1 | awk '{print $7}')
        if [ "x$rmb" == "x" ]; then
            lmx=-1
        fi
    fi

    local pid=$(docker inspect -f '{{.State.Pid}}' $long_id)
    local net=$(collect_network_device_traffic $pid)
    local disk_io=$(collect_blkio $cgroup_parent)

    echo -e "${timestamp}\t${cpu_usage}\t${memory_usage}\t${cpi}\t${mkpi}\t${llc}\t${lmb}\t${rmb}\t${net}\t${disk_io}" >> ${container_collection_file}
}

function collect_container_cpu_usage() {
    tstart=$(date +%s%N)
    cstart=$(cat /sys/fs/cgroup/cpuacct/$1/cpuacct.usage)
    sleep 1
    tend=$(date +%s%N)
    cend=$(cat /sys/fs/cgroup/cpuacct/$1/cpuacct.usage)

    echo $(round $(echo "($cend-$cstart)*1000/($tend-$tstart)" | bc -l) 0)
}

function collect_network_device_traffic() {
    local net_file="/proc/$1/net/dev"
    local net=""

    local rx_before=()
    local tx_before=()
    local i=0

    while read line; do
        local device=$(echo $line | awk '{print $1}')
        if [[ $device != eth* ]] && [[ $device != ens* ]]; then
            continue
        fi

        local rx_bytes_before=$(echo $line | awk '{print $2}')
        rx_before[$i]=$rx_bytes_before
        local tx_bytes_before=$(echo $line | awk '{print $10}')
        tx_before[$i]=$tx_bytes_before

        i=$((i+1))
    done < $net_file

    sleep 1

    local i=0
    while read line; do
        local device=$(echo $line | awk '{print $1}')
        if [[ $device != eth* ]] && [[ $device != ens* ]]; then
            continue
        fi

        local rx_bytes_after=$(echo $line | awk '{print $2}')
        local tx_bytes_after=$(echo $line | awk '{print $10}')

        local rx_bytes_before=${rx_before[$i]}
        local tx_bytes_before=${tx_before[$i]}
        local net_in=$((rx_bytes_after-rx_bytes_before))
        local net_out=$((tx_bytes_after-tx_bytes_before))
        i=$((i+1))

        net="${net};${device}${net_in},${net_out}"
    done < $net_file

    echo ${net#*;}
}

function collect_blkio() {
    local blkio_file="/sys/fs/cgroup/blkio/$1/blkio.throttle.io_service_bytes"
    local blkio=""

    declare -A read_before
    declare -A write_before

    while read line; do
        local device=$(echo $line | awk '{print $1}')
        local type=$(echo $line | awk '{print $2}')
        local bytes=$(echo $line | awk '{print $3}')
        if [[ "$type" == "Read" ]]; then
            read_before[$device]=$bytes
        elif [[ "$type" == "Write" ]]; then
            write_before[$device]=$bytes
        fi
    done < $blkio_file

    sleep 1

    while read line; do
        local device=$(echo $line | awk '{print $1}')
        local type=$(echo $line | awk '{print $2}')
        local bytes=$(echo $line | awk '{print $3}')
        if [[ "$type" == "Read" ]]; then
            local before=${read_before[$device]}
            if [[ "x$before" == "x" ]]; then
                before=0
            fi

            local rbps=$((bytes-before))
            blkio="$blkio;$device:$rbps"

        elif [[ "$type" == "Write" ]]; then
            local before=${write_before[$device]}
            if [[ "x$before" == "x" ]]; then
                before=0
            fi
            
            local wbps=$((bytes-before))
            blkio="$blkio,$wbps"
        fi
    done < $blkio_file

    echo ${blkio#*;}
}

function collect_machine_loop() {
    touch ${machine_collection_file}
    touch ${machine_disk_detail_file}

    collect_machine_metadata
    echo -e "DATE\tCPU USAGE PERCENT\tMEM USAGE PERCENT\tCPI\tMKPI\tLLC\tLMB\tRMB\tNET\tDISK I/O" >> ${machine_collection_file}
    while true; do
        collect_machine_usage
        collect_disk_io_detail
    done
}

function collect_container_loop() {
    while true; do
        containers=$(docker ps | grep -v pause | awk '{print $1}' | tail -n +2)
        for container in $(echo $containers); do
            collect_container_metadata $container
            collect_container_usage $container
        done
    done
}

function round() {
    printf "%.${2}f" "${1}"
}
